 

### -------------------------------------------------- #
### ---- ASSIGN KEY VARS & SET UP ---- 
### -------------------------------------------------- #

### Clear global environment
rm(list=ls()) 

### Libraries:
pacman::p_unload(all)
pacman::p_load(pacman, tidyverse, data.table, lazyeval, magrittr, lubridate, here,
               foreign, haven, readstata13 #for exporting to stata
)
i_am("Survey and App Match/115_remove_unwanted_matches.R")


### -------------------------------------------------- #
### ---- Read in Dedup strategy 10 data  ---- 
### -------------------------------------------------- #

dat <- read.csv(here("Survey and App Match", "Temp_Data", "4-13_comb_tfa_matches_clean_merge_m1_dedup10.csv"))



### ---------------------------------------------------------- #
### ---- Identify and recode matches from other sources  ---- 
### ----------------------------------------------------------- #

# zero by default
dat$good_or_no_match <- 0

#set to 1 if matched to a good source 
dat$good_or_no_match[dat$vf_match_state_ca_flag==1|dat$vf_match_state_uni_flag==1|dat$vf_match_state_cell_flag==1] <- 1

#set to 1 if unmatched
dat$good_or_no_match[dat$state_vf_matched==""] <- 1

#set all votes that are currently 1 to 0 if it's not a good match
dat <- dat %>% 
  mutate(across(contains("_vote_"), as.numeric)) %>%
  mutate(across(contains("_vote_"), ~ case_when(good_or_no_match==0&.!=0~0,
                                                T~.)))
#turn flags and match indicators to off
dat <- dat %>% 
  mutate(across(contains("_flag"), ~case_when(good_or_no_match==0~0L,
                                              T~.))) %>%
  mutate(state_vf_matched = case_when(good_or_no_match==0~"",
                                       T~state_vf_matched))


### -------------------------------------------------- #
### ---- Export Data  ---- 
### -------------------------------------------------- #

write.csv(dat, here("Survey and App Match", "Temp_Data", "4-13_comb_tfa_matches_clean_merge_m1_dedup10.csv"))



### -------------------------------------------------- #
### ---- Read in all-applicant flag data  ---- 
### -------------------------------------------------- #

#load flags data
load(here("Survey and App Match", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
#load survey data
survey <- read_dta(here("Data", "tfa_data.dta"))
#load career data
teacher <- read.csv(here("Data", "teacher_deidentified.csv"))

survey$birthyear <- year(dmy(survey$dateofbirth))

#merge 
tfa_dat_flag <- left_join(tfa_dat_flag, survey, by="personid", suffix=c("", ".y")) %>%
  select(-contains(".y"))
tfa_dat_flag <- left_join(tfa_dat_flag, teacher %>% mutate(responseid = as.character(responseid)), by="responseid")

tfa_dat_flag$yob <- str_sub(tfa_dat_flag$dateofbirth, -2)
tfa_dat_flag$yob <- as.numeric(paste0("19", tfa_dat_flag$yob))
tfa_dat_flag$yob <- ifelse(tfa_dat_flag$yob>1900, tfa_dat_flag$yob, NA)

#select columns needed for analysis
tfa_dat_flag %<>% select(personid, appyear, dispositionstep, appethnicity, birthyear, gender, state_ca, 
                         receivedpellgrants, career1b, AppEthnicity, ReceivedPellGrants, started, matriculated4, yob)

save(tfa_dat_flag, file=here("Survey and App Match", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
